/*******************************************************************************
 Copyright (c) 2021 Arm  Corporation All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

   * Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
   * Neither the name of Intel Corporation nor the names of its contributors
     may be used to endorse or promote products derived from this software
     without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/

#ifndef __AESNI_EMU_AARCH64_INC_
#define __AESNI_EMU_AARCH64_INC_

// x0 - x18, x29, x30  gp caller saved register
#define NUM_CALLER_SAVED_GP 21
// q0 - q7, q16 - q31 simd caller saved register
#define NUM_CALLER_SAVED_SIMD 24

#define GP_SZ 8
#define SIMD_SZ 16
#define ARG_SZ 16

// 8 extra bytes added to align to 16 bytes
#define ARG1_OFFSET (((NUM_CALLER_SAVED_GP + 1) * GP_SZ) + (NUM_CALLER_SAVED_SIMD * SIMD_SZ))
#define ARG2_OFFSET (ARG1_OFFSET + ARG_SZ)
#define RES_STACK_SZ (ARG2_OFFSET + ARG_SZ)

.macro CLEAR_STACK
	// clear stack v registers used
.set INDEX, 176
.rept 48
	str	xzr, [sp, INDEX]
.set INDEX, (INDEX + 8)
.endr
	// clear stack ARG used
	str	xzr, [sp, ARG1_OFFSET]
	str	xzr, [sp, ARG1_OFFSET + 8]
	str	xzr, [sp, ARG2_OFFSET]
	str	xzr, [sp, ARG2_OFFSET + 8]
.endm

.macro CALLER_SAVE_GP
	sub	sp, sp, RES_STACK_SZ
	stp	x29, x30, [sp]
	stp	x0, x1, [sp, 16]
	stp	x2, x3, [sp, 32]
	stp	x4, x5, [sp, 48]
	stp	x6, x7, [sp, 64]
	stp	x8, x9, [sp, 80]
	stp	x10, x11, [sp, 96]
	stp	x12, x13, [sp, 112]
	stp	x14, x15, [sp, 128]
	stp	x16, x17, [sp, 144]
	str	x18, [sp, 160]
.endm

.macro CALLER_SAVE_SIMD
	stp	q0, q1, [sp, 176]
	stp	q2, q3, [sp, 208]
	stp	q4, q5, [sp, 240]
	stp	q6, q7, [sp, 272]
	stp	q16, q17, [sp, 304]
	stp	q18, q19, [sp, 336]
	stp	q20, q21, [sp, 368]
	stp	q22, q23, [sp, 400]
	stp	q24, q25, [sp, 432]
	stp	q26, q27, [sp, 464]
	stp	q28, q29, [sp, 496]
	stp	q30, q31, [sp, 528]
.endm

.macro CALLER_RESTORE_GP
	ldp	x29, x30, [sp]
	ldp	x0, x1, [sp, 16]
	ldp	x2, x3, [sp, 32]
	ldp	x4, x5, [sp, 48]
	ldp	x6, x7, [sp, 64]
	ldp	x8, x9, [sp, 80]
	ldp	x10, x11, [sp, 96]
	ldp	x12, x13, [sp, 112]
	ldp	x14, x15, [sp, 128]
	ldp	x16, x17, [sp, 144]
	ldr	x18, [sp, 160]

	add	sp, sp, RES_STACK_SZ
.endm

.macro CALLER_RESTORE_SIMD
	ldp	q0, q1, [sp, 176]
	ldp	q2, q3, [sp, 208]
	ldp	q4, q5, [sp, 240]
	ldp	q6, q7, [sp, 272]
	ldp	q16, q17, [sp, 304]
	ldp	q18, q19, [sp, 336]
	ldp	q20, q21, [sp, 368]
	ldp	q22, q23, [sp, 400]
	ldp	q24, q25, [sp, 432]
	ldp	q26, q27, [sp, 464]
	ldp	q28, q29, [sp, 496]
	ldp	q30, q31, [sp, 528]
.endm

.macro EMULATE_AESNI func, src_dst, key
	#define arg1 x0
	#define arg2 x1

	CALLER_SAVE_GP

	add	x23, sp, ARG1_OFFSET
	st1	{\src_dst\().16b}, [x23]
	add	x23, sp, ARG2_OFFSET
	st1	{\key\().16b}, [x23]

	CALLER_SAVE_SIMD

	// fill in args for func
	add	arg1, sp, ARG1_OFFSET
	add	arg2, sp, ARG2_OFFSET

	bl	 \func

	CALLER_RESTORE_SIMD

	// Destination v register gets overwritten with result from func
	add	x23, sp, ARG1_OFFSET
	ld1	{\src_dst\().16b}, [x23]

#ifdef SAFE_DATA
	CLEAR_STACK
#endif
	CALLER_RESTORE_GP
.endm

.macro EMULATE_AARCH64_PMULL func, dst, src1, src2, imm
	#define arg1 x0
	#define arg2 x1
	#define arg3 x2

	CALLER_SAVE_GP

	add	x23, sp, ARG1_OFFSET
	st1	{\src1\().16b}, [x23]
	add	x23, sp, ARG2_OFFSET
	st1	{\src2\().16b}, [x23]

	CALLER_SAVE_SIMD

	// fill in args for func
	add	arg1, sp, ARG1_OFFSET
	add	arg2, sp, ARG2_OFFSET
	mov	arg3, \imm

	bl	 \func

	CALLER_RESTORE_SIMD

	// Destination v register gets overwritten with result from func
	add	x23, sp, ARG1_OFFSET
	ld1	{\dst\().16b}, [x23]

#ifdef SAFE_DATA
	CLEAR_STACK
#endif
	CALLER_RESTORE_GP
.endm

.macro EMULATE_AESENCLAST src_dst, key, tmp
	EMULATE_AESNI emulate_AESENCLAST, \src_dst, \key
.endm

.macro EMULATE_PMULL dst, src1, src2
	EMULATE_AARCH64_PMULL emulate_PCLMULQDQ, \dst, \src1, \src2, 0x00
.endm

.macro EMULATE_PMULL2 dst, src1, src2
	EMULATE_AARCH64_PMULL emulate_PCLMULQDQ, \dst, \src1, \src2, 0x11
.endm

#endif
